In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pandas import DataFrame, Series

In [2]:
fichero_pop_total = 'API_SP.POP.TOTL_DS2_es_csv_v2.csv'

In [ ]:


In [3]:
dfpoptotal = pd.read_csv(fichero_pop_total,sep=',',header=1,skiprows=2) #Lee un fichero csv

In [4]:
dfpoptotal.describe() # resumen estaditico de la data


Out[4]:
1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... 2008 2009 2010 2011 2012 2013 2014 2015 2016 Unnamed: 61
count 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 2.600000e+02 ... 2.630000e+02 2.630000e+02 2.630000e+02 2.630000e+02 2.620000e+02 2.620000e+02 2.620000e+02 2.620000e+02 0.0 0.0
mean 1.184089e+08 1.199950e+08 1.221120e+08 1.247393e+08 1.273930e+08 1.301080e+08 1.329666e+08 1.358183e+08 1.387221e+08 1.417697e+08 ... 2.708574e+08 2.743544e+08 2.778728e+08 2.814299e+08 2.860500e+08 2.897585e+08 2.934223e+08 2.971317e+08 NaN NaN
std 3.729446e+08 3.773963e+08 3.839358e+08 3.924399e+08 4.010045e+08 4.097919e+08 4.191425e+08 4.284411e+08 4.379496e+08 4.479445e+08 ... 8.604460e+08 8.709231e+08 8.814713e+08 8.921403e+08 9.044301e+08 9.154422e+08 9.264383e+08 9.375048e+08 NaN NaN
min 4.279000e+03 4.453000e+03 4.566000e+03 4.656000e+03 4.748000e+03 4.841000e+03 4.936000e+03 5.033000e+03 5.161000e+03 5.303000e+03 ... 9.788000e+03 9.808000e+03 9.827000e+03 9.844000e+03 9.860000e+03 9.876000e+03 9.893000e+03 9.916000e+03 NaN NaN
25% 5.179030e+05 5.295098e+05 5.418540e+05 5.549710e+05 5.681712e+05 5.724870e+05 5.782962e+05 5.899358e+05 6.290002e+05 6.550080e+05 ... 1.363026e+06 1.420972e+06 1.436706e+06 1.456044e+06 1.409556e+06 1.424658e+06 1.443366e+06 1.464251e+06 NaN NaN
50% 3.669526e+06 3.734208e+06 3.811270e+06 3.901628e+06 4.020265e+06 4.145164e+06 4.235566e+06 4.300506e+06 4.367332e+06 4.449020e+06 ... 9.132589e+06 9.298515e+06 9.490000e+06 9.779391e+06 9.976996e+06 1.008062e+07 1.013376e+07 1.009667e+07 NaN NaN
75% 2.532754e+07 2.611541e+07 2.690508e+07 2.770050e+07 2.847884e+07 2.924675e+07 2.999191e+07 3.059503e+07 3.120239e+07 3.179527e+07 ... 5.016311e+07 5.069532e+07 5.125242e+07 5.183768e+07 5.249698e+07 5.314012e+07 5.390328e+07 5.469198e+07 NaN NaN
max 3.035056e+09 3.076121e+09 3.129064e+09 3.193947e+09 3.259355e+09 3.326054e+09 3.395866e+09 3.465297e+09 3.535512e+09 3.609910e+09 ... 6.758303e+09 6.840956e+09 6.923684e+09 7.006908e+09 7.089452e+09 7.176092e+09 7.260780e+09 7.346633e+09 NaN NaN

8 rows × 58 columns


In [5]:
dfpoptotal['2015'].describe() # describe la columna seleccionada


Out[5]:
count    2.620000e+02
mean     2.971317e+08
std      9.375048e+08
min      9.916000e+03
25%      1.464251e+06
50%      1.009667e+07
75%      5.469198e+07
max      7.346633e+09
Name: 2015, dtype: float64

In [6]:
dfpoptotal['2015'].value_counts(dropna=True)  # value_counts(), metodo que retorna la frecuencia para cada entrada única


Out[6]:
1.744161e+09    2
1.000981e+09    2
7.726681e+07    1
5.957000e+06    1
1.138956e+07    1
2.851370e+07    1
7.748300e+05    1
4.605030e+07    1
7.304578e+06    1
4.900274e+06    1
4.224404e+06    1
4.490541e+06    1
2.515532e+07    1
2.129100e+04    1
3.129950e+07    1
1.440968e+08    1
6.680838e+07    1
5.535002e+06    1
3.474182e+06    1
1.311051e+09    1
3.178100e+04    1
1.849990e+05    1
9.150808e+07    1
1.609956e+08    1
1.850241e+07    1
6.183635e+09    1
6.795936e+07    1
5.495692e+07    1
3.308230e+05    1
8.380400e+06    1
               ..
5.876060e+05    1
1.044600e+05    1
5.061704e+07    1
5.389715e+07    1
5.557200e+04    1
3.880190e+05    1
1.371220e+09    1
3.903238e+07    1
1.580400e+05    1
3.110808e+07    1
4.422143e+06    1
4.240653e+08    1
1.659557e+09    1
8.141314e+07    1
2.096600e+07    1
7.305700e+06    1
1.068250e+05    1
9.181800e+04    1
3.799949e+07    1
1.128572e+07    1
6.802023e+06    1
5.373502e+06    1
6.053650e+08    1
1.889249e+08    1
2.797786e+07    1
1.560275e+07    1
7.268000e+04    1
4.524078e+09    1
3.805006e+07    1
1.000888e+09    1
Name: 2015, dtype: int64

Visualización de datos


In [7]:
# Histograma
dfpoptotal['2015'].plot(kind='hist', rot=70, logx=True, logy=True)

# show
plt.show()